# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes and tools used to handle the OpenCL backend.
* :class:`~hysop.gpu.tools.OpenClEnvironment`:
object handling opencl platform, device ... info.
* :func:`~hysop.gpu.tools.get_opengl_shared_environment`:
build or get an OpenCL environment with openGL properties.
* :func:`~hysop.gpu.tools.get_opencl_environment`:
build or get an OpenCL environment.
* :func:`~hysop.gpu.tools.explore`
explore system and display platform, devices, memory ... info.
"""
import sys, os, re, itertools, hashlib, gzip, hashlib
from hysop import (
__VERBOSE__,
__KERNEL_DEBUG__,
__DEFAULT_PLATFORM_ID__,
__DEFAULT_DEVICE_ID__,
)
from hysop import vprint
from hysop.backend.device.opencl import cl, __OPENCL_PROFILE__
from hysop.constants import np, Precision, DeviceType, HYSOP_REAL
from hysop.core.mpi import MPI
from hysop.tools.parameters import MPIParams
from hysop.tools.io_utils import IO
from hysop.tools.decorators import static_vars
from hysop.tools.htypes import check_instance, to_tuple, first_not_None
[docs]
class KernelError(Exception):
"""
Custom exception for kernel errors.
"""
def __init__(msg, err):
super(KernelError, self).__init__(msg)
self.msg = msg
self.err = err
def __str__(self):
return self.err + ": " + self.msg
[docs]
def explore():
"""Scan system and print OpenCL environment details"""
print("OpenCL exploration : ")
platforms = cl.get_platforms()
platforms_info = ["name", "version", "vendor", "profile", "extensions"]
devices_info = [
"name",
"version",
"vendor",
"profile",
"extensions",
"available",
"type",
"compiler_available",
"double_fp_config",
"single_fp_config",
"global_mem_size",
"global_mem_cache_type",
"global_mem_cache_size",
"global_mem_cacheline_size",
"local_mem_size",
"local_mem_type",
"max_clock_frequency",
"max_compute_units",
"max_constant_buffer_size",
"max_mem_alloc_size",
"max_work_group_size",
"max_work_item_dimensions",
"max_work_item_sizes",
"preferred_vector_width_double",
"preferred_vector_width_float",
"preferred_vector_width_int",
]
for pltfm in plaforms:
print("Platform:", pltfm.name)
for pltfm_info in platforms_info:
print(" |-", pltfm_info, ":", eval("pltfm." + pltfm_info))
devices = pltfm.get_devices()
for dvc in devices:
print(" |- Device:", dvc.name)
for dvc_info in devices_info:
print(" |-", dvc_info, ":", eval("dvc." + dvc_info))
[docs]
def convert_device_type(device_type):
"""
Converts a hysop device type to corresponding opencl device type.
"""
if device_type is None:
return None
check_instance(device_type, DeviceType)
conversion = {
DeviceType.ALL: cl.device_type.ALL,
DeviceType.ACCELERATOR: cl.device_type.ACCELERATOR,
DeviceType.CPU: cl.device_type.CPU,
DeviceType.GPU: cl.device_type.GPU,
# DeviceType.CUSTOM: cl.device_type.CUSTOM,
DeviceType.DEFAULT: cl.device_type.DEFAULT,
}
if device_type not in conversion.keys():
msg = f"Unknown device type {device_type}."
raise ValueError(msg)
return conversion[device_type]
[docs]
def convert_precision(precision):
"""
Converts a hysop precision to corresponding numpy dtype.
"""
if precision is None:
return None
check_instance(precision, Precision)
if precision == Precision.SAME:
msg = "Cannot convert Precision.SAME to numpy dtype."
raise ValueError(msg)
if precision == Precision.QUAD:
msg = "Numpy does not support the 128-bit IEEE quad precision data type."
raise RuntimeError(msg)
# TODO when long double will be supported check if device has np.float96 or np.float128 long doubles
# (ie padded to 3*32bits or 2*64bits)
conversion = {
Precision.DEFAULT: HYSOP_REAL,
Precision.LONG_DOUBLE: np.longdouble,
Precision.DOUBLE: np.float64,
Precision.FLOAT: np.float32,
Precision.HALF: np.float16,
}
if precision not in conversion.keys():
msg = f"Unknown precision {precision}."
raise ValueError(msg)
return conversion[precision]
[docs]
@static_vars(opencl_environments=dict())
def get_device_number(platform_id=None):
platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__)
platform = get_platform(platform_id, strict=True)
return len(platform.get_devices())
[docs]
@static_vars(opencl_environments=dict())
def get_or_create_opencl_env(
mpi_params,
platform_id=None,
device_id=None,
device_type=None,
gl_sharing=False,
**kargs,
):
"""
Create or an OpenClEnvironment from given parameters if it does not already exists.
All environements are kept alive (cached) in a dictionary local to this
function (ie. all opencl operators can share the same OpenClEnvironment).
"""
platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__)
device_id = first_not_None(device_id, __DEFAULT_DEVICE_ID__)
device_type = first_not_None(device_type, DeviceType.ALL)
check_instance(mpi_params, MPIParams)
check_instance(platform_id, int)
check_instance(device_id, int)
check_instance(device_type, DeviceType, allow_none=True)
check_instance(gl_sharing, bool)
key = (
mpi_params,
platform_id,
device_id,
device_type,
gl_sharing,
)
opencl_envs = get_or_create_opencl_env.opencl_environments
if key in opencl_envs:
return opencl_envs[key]
from hysop.backend.device.opencl.opencl_env import OpenClEnvironment
env = OpenClEnvironment(
platform_id=platform_id,
device_id=device_id,
device_type=device_type,
gl_sharing=gl_sharing,
mpi_params=mpi_params,
**kargs,
)
opencl_envs[key] = env
return env
[docs]
def create_queue(ctx, props=None):
"""
Returns OpenCL queue from context
ctx : OpenCL context
"""
if props is not None:
queue = cl.CommandQueue(ctx, props)
elif __OPENCL_PROFILE__:
props = cl.command_queue_properties.PROFILING_ENABLE
queue = cl.CommandQueue(ctx, properties=props)
else:
queue = cl.CommandQueue(ctx)
return queue
[docs]
def get_work_items(resolution, vector_width=1):
"""Set the optimal work-item number and OpenCL space index.
Parameters
----------
resolution : tuple
local mesh resolution
vector_width : int
OpenCL vector types width
Returns
-------
int : work-item number
tuple : global space index
tuple : local space index
Use 64 work-items in 3D and 256 in 2D.
Use Both the number from device capability
The problem must be a multiple of and greater
than work-item number * vector_width
"""
# Optimal work item number
if len(resolution) == 3:
workItemNumber = 64 if min(resolution) >= 64 else min(resolution)
else:
workItemNumber = 256 if min(resolution) >= 256 else min(resolution)
# Change work-item regarding problem size
if resolution[0] % workItemNumber > 0:
if len(resolution) == 3:
print(
"Warning : GPU best performances obtained for",
)
print("problem sizes multiples of 64")
else:
print(
"Warning : GPU best performances obtained for",
)
print("problem sizes multiples of 256")
while resolution[0] % workItemNumber > 0:
workItemNumber = workItemNumber // 2
# Change work-item regarding vector_width
if workItemNumber * vector_width > resolution[0]:
if resolution[0] % vector_width > 0:
raise ValueError(
"Resolution ({}) must be a multiple of {}".format(
resolution[0], vector_width
)
)
workItemNumber = resolution[0] // vector_width
if len(resolution) == 3:
gwi = (int(workItemNumber), int(resolution[1]), int(resolution[2]))
lwi = (int(workItemNumber), 1, 1)
else:
gwi = (int(workItemNumber), int(resolution[1]))
lwi = (int(workItemNumber), 1)
return workItemNumber, gwi, lwi
[docs]
def get_device(platform, device_id, device_type, strict):
"""Returns an OpenCL device
Parameters
----------
platform : cl.Platform
chosen platform.
device_id : int
chosen device id.
device_type : string
chosen device type.
strict: bool
If set to true, raise an error if the device does not exist.
Else fallback to the first platform device.
Try to use given parameters and in case of fails, use pyopencl context
creation function.
"""
try:
if device_type != cl.device_type.ALL:
devices = platform.get_devices(device_type)
device = devices[device_id]
else:
device = platform.get_devices()[device_id]
except IndexError:
msg = f"\nIncorrect device_id {device_id}"
msg += f"\nThere is only {len(platform.get_devices())} devices available."
if strict:
msg += "\nFATAL ERROR: Strict device_id condition violated.\n"
print(msg)
raise
else:
msg += f"\nGetting first device of type {device_type}."
vprint(msg)
device = platform.get_devices()[0]
except:
msg = f"\nCould not get a device of type {device_type}"
if strict:
msg += "\nFATAL ERROR: Strict device_type condition violated.\n"
vprint(msg)
raise
else:
msg += "\nGetting first device in platform."
vprint(msg)
device = platform.get_devices()[0]
return device
[docs]
@static_vars(contexts={})
def get_context(devices, gl_sharing):
"""Returns OpenCL context
Parameters
----------
devices: OpenCL device or tuple of devices
which handles the context.
gl_sharing : bool
True to build a context shared between OpenGL and OpenCL.
Default=False.
Notes
-----
Only one context is created per vendor/platform, containing all devices.
"""
props = None
devices = to_tuple(devices)
contexts = get_context.contexts
key = devices + (gl_sharing,)
if key in contexts:
return contexts[key]
if gl_sharing:
from pyopencl.tools import get_gl_sharing_context_properties
if sys.platform == "darwin":
props = get_gl_sharing_context_properties()
else:
# Some OSs prefer clCreateContextFromType, some prefer
# clCreateContext. Try both.
props = [
(cl.context_properties.PLATFORM, self.platform)
] + get_gl_sharing_context_properties()
ctx = cl.Context(properties=props, devices=devices)
else:
print("PyOpenCL Context devices:", devices)
ctx = cl.Context(devices=devices)
contexts[key] = ctx
return ctx
[docs]
def parse_opencl_file(f, n=8, nb_remesh_components=1):
"""Parse a file containing OpenCL sources.
Parameters
----------
f : string
file name
n : int, optional
vector width, default=8
nb_remesh_components : int
number of remeshed components
Returns
-------
string, the parsed sources.
Notes
-----
* __N__ is expanded as an integer corresponding to vector width.
* __NN__ instruction is duplicated to operate on each vector component:
* if line ends with ';', the whole instruciton is
duplicated.
* if line ends with ',' and contains
'(float__N__)(', the float element is duplicated
* Remeshing fields components are expanded as follows :
All code between '__RCOMPONENT_S__' and
'__RCOMPONENT_E__' flags are duplicated n times with n
the number of components to compute. In this duplicated code, the
flag '__ID__' is replaced by index of a range of lenght
the number of components. A flag '__RCOMPONENT_S__P__'
may be used and the duplicated elements are separated with ','
(for function parameters expanding).
Examples with a 4-width vector code::
float__N__ x; -> float4 x;
x.s__NN__ = 1.0f; -> x.s0 = 1.0f;
x.s1 = 1.0f;
x.s2 = 1.0f;
x.s3 = 1.0f;
x = (int__N__)(__NN__, -> x = (int4)(0,
); 1,
2,
3,
);
Examples with a 2 components expansion code::
__RCOMP_P __global const float* var__ID__,
-> __global const float* var0,__global const float* var1,
__RCOMP_I var__ID__[i] = 0.0;
-> var0[i] = 0.0;var1[i] = 0.0;
aFunction(__RCOMP_P var__ID__, __RCOMP_P other__ID__);
-> aFunction(var0, var1, other0, other1);
"""
src = ""
# replacement for floatN elements
vec_floatn = re.compile(r"\(float__N__\)\(")
vec_nn = re.compile(r"__NN__")
vec_n = re.compile(r"__N__")
for l in f.readlines():
# Expand floatN items
if vec_floatn.search(l) and vec_nn.search(l) and l[-2] == ",":
sl = l.split("(float__N__)(")
l = sl[0] + "(float" + str(n) + ")("
el = sl[1].rsplit(",", 1)[0]
for i in range(n):
l += vec_nn.sub(str(i), el) + ","
l = l[:-1] + "\n"
# Expand floatN elements access
elif vec_nn.search(l) and l[-2] == ";":
el = ""
for i in range(n):
el += vec_nn.sub(str(i), l)
l = el
# Replace vector length
src += vec_n.sub(str(n), l)
# Replacement for remeshed components
re_instr = re.compile(r"__RCOMP_I([\w\s\.,()\[\]+*/=-]+;)")
# __RCOMP_I ...;
def repl_instruction(m):
return "".join(
[m.group(1).replace("__ID__", str(i)) for i in range(nb_remesh_components)]
)
# __RCOMP_P ..., ou __RCOMP_P ...)
re_param = re.compile(r"__RCOMP_P([\w\s\.\[\]+*/=-]+(?=,|\)))")
def repl_parameter(m):
return ", ".join(
[m.group(1).replace("__ID__", str(i)) for i in range(nb_remesh_components)]
)
src = re_instr.sub(repl_instruction, src)
src = re_param.sub(repl_parameter, src)
return src